* Firm organization with multiple establishments
* Section III.A data preparation

clear all 
set matsize 2000
set more off, perm

capture log close
log using log/03_facts-cs_MElocation_data.log, replace

********************************************************************************
***	CALCULATE MARKET POTENTIAL

use data/Distanzen_v2014.dta, clear

gen jahr = 2012
merge m:1 ao_kreis jahr using data/Kreis_controls.dta, keep(1 3)
tab ao_kreis if _merge == 1
drop _merge

gen w_BIP = (1/distance_all)*BIP
replace w_BIP = BIP if hq_kreis == ao_kreis
bys hq_kreis: egen double MP = total(w_BIP)
label variable MP "Market potential, average distance weighted"

drop ao_kreis
rename hq_kreis ao_kreis

keep ao_kreis MP
duplicates drop
save data/marketpotential.dta, replace

********************************************************************************
***	PREPARE DATA FOR REGRESSION

use data/Panel.dta if jahr == 2012, clear

keep untid betnr jahr persnr tentgelt

merge m:1 betnr jahr using data/BHPinclUntID.dta, keep(3) keepusing(ao_kreis)
drop _merge

bys ao_kreis jahr: egen wag_kreis = total(tentgelt)
bys ao_kreis jahr: egen emp_kreis = count(persnr)

bys untid ao_kreis jahr: egen wag_kreis_unt = total(tentgelt)
bys untid ao_kreis jahr: egen emp_kreis_unt = count(persnr)

gen s_unt = emp_kreis_unt / emp_kreis

gen mean_wage = wag_kreis/emp_kreis
gen aux = wag_kreis_unt/emp_kreis

gen mean_wage2 = (mean_wage - aux) / (1-s_unt)
gen diff = mean_wage - mean_wage2
count if diff == 0
tabstat diff, s(n mean sd min p5 p10 q p90 p95 max)
tabstat emp_kreis_unt if diff < -1 | diff > 1, s(n mean sd p5 p10 q p90 p95)
tabstat emp_kreis_unt if diff >= -1 & diff <= 1, s(n mean sd p5 p10 q p90 p95)
//	Large deviations are driven by large firms

keep untid jahr mean_wage* ao_kreis
duplicates drop
isid untid jahr ao_kreis

label variable mean_wage2 "Average wages in year and county excluding firm"
label variable mean_wage  "Average wages in year and county"

save data/wages_untid_aokreis_2012.dta, replace

********************************************************************************
***	FILLIN DATA SET

use data/Panel.dta if jahr == 2012, clear

keep untid betnr jahr ao_kreis persnr

bys untid jahr: egen empl_unt = count(persnr)
bys betnr jahr: egen empl_bet = count(persnr)

drop persnr
duplicates drop

isid betnr jahr

//	Restrict sample
bys untid jahr: egen count_est = count(betnr)
keep if count_est > 1
keep if empl_unt >= 10
drop count_est

merge 1:1 betnr jahr using data/BHPinclUntID.dta, keep(3) keepusing(hauptbet)
drop _merge

// Rectangularize dataset to give every firm the option to invest in any county
sort untid jahr
egen untjhr_id = group(untid jahr)
gen inv = 1

fillin untjhr_id ao_kreis
replace inv = 0 if _fillin == 1

sort untjhr_id untid jahr _fillin betnr

foreach variable in untid jahr empl_unt {
	bys untjhr_id (`variable'): replace `variable' = `variable'[_n-1] if `variable' == . & `variable'[_n-1] != . 
	count if `variable' == .
}

********************************************************************************
***	MERGE COVARIATES

//	Firm/establishment characteristics
merge m:1 betnr jahr using data/BHPinclUntID.dta, keep(1 3) keepusing(hq_kreis Rechtsform_neu w08_5 w93_3_gen)
drop _merge
count if Rechtsform_neu == . & inv == 1

qui gen w08_3 = int(w08_5 / 100)
drop w08_5

qui gen aux_hq_wz = w08_3 if hauptbet == 1
bys untid jahr: egen hq_wz = min(aux_hq_wz)
drop aux_hq_wz w08_3

qui gen aux_hq_wz93 = w93_3_gen if hauptbet == 1
bys untid jahr: egen hq_wz93 = min(aux_hq_wz)
drop aux_hq_wz
order hq_kreis hq_wz hq_wz93, after(hauptbet)

sort untjhr_id untid jahr _fillin betnr

foreach variable in hq_kreis Rechtsform_neu {
	bys untjhr_id (`variable'): replace `variable' = `variable'[_n-1] if `variable' == . & `variable'[_n-1] != . 
	count if `variable' == .
}
compress

qui gen legal_form = .
qui replace legal_form = 1 if (Rechtsform_neu == 8 | Rechtsform_neu == 16 | Rechtsform_neu == 17 | Rechtsform_neu == 19 | Rechtsform_neu == 29)
qui replace legal_form = 2 if (Rechtsform_neu == 3 | Rechtsform_neu == 11 | Rechtsform_neu == 12 | Rechtsform_neu == 14 | Rechtsform_neu == 15)
qui replace legal_form = 3 if (Rechtsform_neu == 13)
qui replace legal_form = 4 if (Rechtsform_neu == 5 | Rechtsform_neu == 22 | Rechtsform_neu == 23 | Rechtsform_neu == 24)
label define legal 1 "Einzelunternehmen" 2 "GmbH & coKG" 3 "GmbH" 4 "AG"
label values legal_form legal
tab legal_form, gen(d_legal)
drop d_legal1

//	Aggregate at county level
collapse (sum) empl_kreis=empl_bet (mean) avg_empl_bet=empl_bet inv d_legal*, by(untid jahr hq_wz hq_wz93 hq_kreis empl_unt ao_kreis)
count
count if inv == 1

//	Distance
merge m:1 ao_kreis hq_kreis using data/Distanzen_v2014.dta, keep(1 3) keepusing(distance_all)
drop _merge

gen ldistall = ln(distance_all)

//	Market potential
merge m:1 ao_kreis using data/marketpotential.dta, keep(1 3)
drop _merge
gen ln_MP = log(MP)

//	Wages
merge 1:1 untid jahr ao_kreis using data/wages_untid_aokreis_2012.dta, keep(1 3)
drop _merge

rename mean_wage aux
bys ao_kreis jahr: egen mean_wage = max(aux)
drop aux
qui replace mean_wage2 = mean_wage if inv == 0
count if mean_wage != mean_wage2
count if mean_wage != mean_wage2 & inv != 1
drop mean_wage

rename mean_wage2 mean_wage

gen aux = mean_wage if ao_kreis == hq_kreis
bys untid jahr: egen hq_mean_wage = max(aux)
drop aux
gen rel_wage =  mean_wage/hq_mean_wage

//	Land prices
merge m:1 ao_kreis jahr using data/Kreis_controls.dta, keep(1 3) keepusing(Preis_pqm_baureif)
drop _merge

qui gen aux = Preis_pqm_baureif if ao_kreis == hq_kreis
qui egen hq_Preis_pqmbau = max(aux), by(untid jahr)
drop aux
qui gen rel_Preis =  Preis_pqm_baureif /hq_Preis_pqmbau
drop hq_Preis_pqmbau

keep untid jahr empl_unt hq_kreis hq_wz hq_wz93 d_legal2 d_legal3 d_legal4 ao_kreis inv avg_empl_bet empl_kreis distance_all ldistall ln_MP rel_wage rel_Preis
order untid jahr empl_unt hq_kreis hq_wz hq_wz93 d_legal2 d_legal3 d_legal4 ao_kreis inv avg_empl_bet empl_kreis distance_all ldistall ln_MP rel_wage rel_Preis

qui tab hq_wz,   gen(d_08wz) // hq_wz is based on w08_3
qui tab hq_wz93, gen(d_93wz) 
qui tab ao_kreis, gen(d_ao_kreis)
qui tab hq_kreis, gen(d_hq_kreis)

save data/MEloc_2012_cs.dta, replace
erase data/marketpotential.dta
erase data/wages_untid_aokreis_2012.dta

log close
